Question 1
df <- read_csv("global_power_plant_database_v_1_3/global_power_plant_database.csv", show_col_types = FALSE)
# Pre-add continent for problem set
df$continent <- countrycode(sourcevar = df[, "country"] %>% pull(),
# pull out the country column from orig data as a vector
origin = 'iso3c', # naming convention of the orig data
destination = "continent") # name of new var to create
#### Commenting out the code block dealing with geocoding
### Getting the values for only United States
# df_US <- df %>%
# filter(country == "USA")
# address <- df_US %>%
# reverse_geo(lat = df_US$latitude, long = df_US$longitude, method = 'osm')
## Converting to Dataframe and renaming the columns
# adr <- tibble(Lat = address[1], Long = address[2], Address = address[3])
#
# adr <- adr %>%
# rename(latitude = Lat, longitude = Long, Address = Address)
#
# ## Unlisting the elements
# adr$latitude <- unlist(adr$latitude)
# adr$longitude <- unlist(adr$longitude)
# adr$Address <- unlist(adr$Address)
#
# ## Joining the tables
# df_US <- df_US %>%
# select(name, capacity_mw, latitude, longitude) %>%
# left_join(adr, by = c('latitude' = 'latitude', 'longitude' = 'longitude'))
# ### Filtering out only the required States
# states <- df_US$Address
#
# ### Getting the address in Virginia and West Virginia
# ind_vir <- str_which(states, "Virginia,")
#
# ### Getting the address in Maryland
# ind_mary <- str_which(states, "Maryland,")
#
# ### Getting the address in DC
# ind_DC <- str_which(states, "District of Columbia,")
#
# ### Filtering out the required rows
# states_ind <- sort(c(ind_vir, ind_mary, ind_DC))
# df_US_dmv <- df_US[states_ind, ]
#
# ### Adding a State column
# for(row in 1:nrow(df_US_dmv)) {
# if(length(str_subset(df_US_dmv$Address[row], "West Virginia,"))){
# df_US_dmv$region[row] = "west virginia"
# df_US_dmv$group[row] = 4
#
# }else if(length(str_subset(df_US_dmv$Address[row], ", Virginia,"))) {
# df_US_dmv$region[row] = "virginia"
# df_US_dmv$group[row] = 3
#
# }else if(length(str_subset(df_US_dmv$Address[row], "Maryland,"))) {
# df_US_dmv$region[row] = "maryland"
# df_US_dmv$group[row] = 2
#
# }else {
# df_US_dmv$region[row] = "district of columbia"
# df_US_dmv$group[row] = 1
# }
# }
#
# ### Renaming to lat and long, needed for mapping! Also filtering outliers
# df_US_dmv <- df_US_dmv %>%
# rename(lat = latitude, long = longitude) %>%
# filter(lat < 42)
#
# ### Saving the dataframe for future use
# write.csv(df_US_dmv, "Pset2_Q1.csv", row.names = F)
### Getting data for US cities
data("us.cities")
### Loading in the saved goecoded data
df_US_dmv <- read.csv("Pset2_Q1.csv")
major_cities <- us.cities %>%
filter(name %in% c("Huntington WV", "Charleston WV", "Frederick MD", "Bel Air South MD", "Dale City VA", "Harrisonburg VA", "Lynchburg VA", "Roanoke VA", "Blacksburg VA", "Danville VA", "Suffolk VA", "Portsmouth VA", "Newport News VA", "Richmond VA", "Tuckahoe VA", "Hampton VA", "Norfolk VA", "Virginia Beach VA", "Chesapeake VA"))
### Mapping
regions <- c("virginia", "maryland", "west virginia", "district of columbia")
state_projection <- map_data('state', region = regions)
county_projection <- map_data('county', region = regions)
base_map <- ggplot(county_projection, aes(x = long, y = lat)) + ### Get the county information in
geom_polygon(aes(group = group), fill = "white", color = "lightgrey") + ### Map the counties
geom_path(data = state_projection, aes(x = long, y = lat, group = group), color = "black") + ### Get the state information and map it
coord_map("polyconic") ### Type of Projection
map_1 <- base_map +
geom_point(data = df_US_dmv, aes(color = region, size = capacity_mw), alpha = 0.4) + ### Powerplant locations
geom_text_repel(data = major_cities, aes(label = name), size = 2.3) + ## Cities names
labs(title = "Maryland has Largest Number of Powerplants ", subtitle = "Locations of powerplants in DC, Maryland, Virginia and West Virginia", color = 'State', size = 'Capacity (MW)') +
scale_color_discrete(labels = c("District of Columbia", "Maryland", "Virginia", "West Virginia")) +
theme_void() ### Removing all unneeded ink
#ggsave("Map_1.png", map_1, dpi = 200) ## Saving the map
map_1

Question 2
### Pulling in the data from Census API
#census_api_key("")
options(tigris_use_cache = TRUE)
virginia_county <- get_acs(
state = "VA",
geography = "county",
variables = "B19013_001",
geometry = TRUE,
year = 2020
)
## Getting data from the 2016-2020 5-year ACS
northern_virginia_counties <- c("Alexandria city", "Arlington County", "Clarke County",
"Culpeper County", "Fairfax city", "Fairfax County", "Falls Church city", "Fauquier County",
"Frederick County", "Fredericksburg city", "Loudoun County",
"Madison County", "Manassas city", "Manassas Park city",
"Prince William County", "Rappahannock County", "Spotsylvania County", "Stafford County",
"Warren County", "Winchester city")
### Code adapted from https://stackoverflow.com/questions/44981338/r-split-string-by-delimiter-in-a-column for splitting a column
NV_data <- virginia_county %>%
mutate(County = strsplit(NAME, ",") %>% ### For extracting only county name from the column
as.data.frame() %>%
t %>%
data.frame(stringsAsFactors = F) %>%
pull(1)) %>%
filter(County %in% northern_virginia_counties)
### Getting the Median of the Median Household Estimates of Northern Virginia
nova_est <- median(NV_data$estimate)
### Getting the geometry for Northern Virginia Region
nv_union <- NV_data %>%
summarize()
nv_union <- st_cast(nv_union, "MULTIPOLYGON") ### Casting to MULTIPOLYGON as summarize converts to POLYGON
### Creating dataframe with only Northern Virginia row
nv_final <- data.frame(NAME = "Northern Virginia", estimate = nova_est, geometry = nv_union$geometry)
### Creating final dataframe with rows excluding Northern Virginia counties
map_df <- virginia_county %>%
mutate(County = strsplit(NAME, ",") %>% ### For extracting only county name from the column
as.data.frame() %>%
t %>%
data.frame(stringsAsFactors = F) %>%
pull(1)) %>%
filter(!County %in% northern_virginia_counties) %>%
select(NAME, estimate, geometry)
### Process to add Northern Virginia Record to map_df
###Source : https://github.com/r-spatial/sf/issues/588
### Step 1 : Convert both data frames to data.table
### Step 2 : Using data.table::rbindlist() to combine them
### Step 3 : Convert back to sf using st_sf()
map_df <- setDT(map_df)
nv_final <- setDT(nv_final)
dt_list = list(map_df, nv_final) ### rbindlist takes in a list object
map_df <- data.table::rbindlist(dt_list, fill = TRUE)
map_df <- st_sf(map_df)
### Mapping
nv_final <- st_sf(nv_final) ### Converting back to sf object
min_income <- min(map_df$estimate)
max_income <- max(map_df$estimate)
divide_by <- 1000 ### For legend formatting
map_2 <- map_df %>%
ggplot(aes(fill = estimate/divide_by)) +
geom_sf(color = NA) +
scale_fill_viridis_c(option = "magma", limits = c(min_income, max_income)/divide_by, labels = label_dollar(suffix = 'K')) +
geom_sf_text(data = nv_final, label = "NoVA") +
labs(title = "Northern Virginia has some of the Highest Median Incomes in the State", subtitle = "Median household incomes across counties in Virginia", fill = "Median Income") +
theme_void() ### Removing all unneeded ink
map_2

#ggsave("Map_2.png", map_2, dpi = 200)
Question 3
### Getting the Election Data and Trump Margin Percentage
election_data <- election
election_data$Trump_Pct_Margin <- ifelse(election_data$winner == "Trump", election_data$pct_margin, 0-election_data$pct_margin)
### Getting the geometry from tilegramR package
state_shape <- sf_NPR1to1
### Joining the tables
states_election <- left_join(state_shape, election_data, by = c("state" = "st"))
## old-style crs object detected; please recreate object with a recent sf::st_crs()
### Mapping
map_3 <- states_election %>%
ggplot() +
geom_sf(aes(fill = Trump_Pct_Margin, color = winner)) +
geom_sf_text(aes(label = state), color = "white") +
scale_color_manual(values = c("blue", "red")) +
scale_fill_gradient2(
low = "darkblue",
mid = "grey",
high = "darkred",
midpoint = 0.0,
labels = label_number(accuracy = 0.1, suffix = '%')
) +
labs(title = "Trump Won in 10 States With Less Than 0.1% Margin", subtitle = "2016 Presidential election results and vote margins", fill = "Trump Margin", color = "Winner") +
theme_void()
#ggsave("Map_3.png", map_3, dpi = 200)
map_3

Question 4
##### NOTE #####
#### To download tidyquant, forecast dependency is required for whihc the FLIBS variable was set to /opt/homebrew/bin/gcc/12.2.0 to point to gfortran
### Get the data
df_faang<- tq_get(c("META", "AAPL", "AMZN", "NFLX", "GOOG"), ### Ticker symbols for FAANG Companies
get = "stock.prices",
from = "2012-01-01",
to = "2022-11-10")
max_trading_volume <-max(df_faang$volume)
divide_by <- 1000000
plot_1 <- ggplot(df_faang, aes(x = volume/divide_by, y = adjusted, color = symbol)) +
scale_x_continuous(trans = "log10", limits = c(1e+06, max_trading_volume)/divide_by, labels = label_number(suffix = "MM", accuracy = 1)) +
scale_y_continuous(labels = label_dollar()) +
scale_color_discrete(labels = c("Amazon", "Apple", "Google", "Meta", "Netflix")) +
geom_point(alpha = 0.8, size = 7)
plot_2 <- plot_1 +
transition_time(date) +
ease_aes("linear") +
labs(title = "Adjusted Price and Volume for FAANG Stocks: {frame_time}", x = "Trading Volume", y = "Adjusted Price", color = "") +
shadow_wake(wake_length = 0.5)
animate(plot_2, renderer = magick_renderer(), nframes = 300)
